//standardize assignee names

//STEP 1-- in STATA-- generate file for NLP firm disambiguation

use "${rawdata}assignment_data/assignment_conveyance.dta", clear
drop if employer_assign == 0
drop if convey_ty != "assignment"
merge 1:m rf_id using "${rawdata}assignment_data/documentid_admin.dta"
drop if convey_ty != "assignment"
drop if error != "none"
drop _merge
bysort rf_id: gen count = _N
bysort appno_doc_num: gen uniqueapp = _N
sort rf_id
joinby rf_id using "${rawdata}assignment_data/assignee.dta"
keep ee_name ee_address_1 ee_address_2 ee_city ee_state ee_postcode ee_country
outsheet using "${filedata}firm_for_disambig.csv", comma


//STEP 2-- NLP firm disambig using PYTHON
/*
scripts/ee_Name.py
this code outputs disambig_results.csv
*/

//step 3. format disambiguated firm data and merge back to full sample of raw data (this comes later-- standardize_assignee.do)
